\documentclass[12pt, titlepage]{article}

\usepackage[UTF8]{ctex}
\usepackage{fancyhdr}
\usepackage{graphicx}
\usepackage{amsmath}

\author{eleve11}
\title{Linear Algebra--线代笔记}
\pagestyle{fancy}

\begin{document}

\begin{table}
    \centering
    \begin{tabular}{cc}
        $\mathbf{A}$ & 矩阵 \\
        $\mathbf{A}^{-1}$ & 矩阵的逆 \\
        $\mathbf{a}$ & 列向量 \\
        $\mathbf{a}^T$ & 行向量 \\
        $det(\mathbf{A})$ & 矩阵A的行列式 \\
        $Tr(\mathbf{A})$ & 矩阵A的迹 \\
        $diag(\mathbf{A})$ & 矩阵A的对角矩阵 \\
        $eig(\mathbf{A})$ & 矩阵A的特征值 \\
        $\mathbf{\varSigma}$ & 一个正定矩阵 \\
        $\mathbf{\varLambda}$ & 一个对角矩阵 \\
    \end{tabular}
\end{table}

\section{求偏导}
    矩阵求导基本假设可以用公式写成:
    \begin{displaymath}
        \frac{\partial X_{kl}}{\partial X_{ij}} = \delta _{ik}\delta _{lj}
    \end{displaymath}
    向量形式:
    \begin{displaymath}
        [\frac{\partial \mathbf{x}}{\partial y}]_i = \frac{\partial x_i}{\partial y} \qquad
        [\frac{\partial x}{\partial \mathbf{y}}]_i = \frac{\partial x}{\partial y_i} \qquad
        [\frac{\partial \mathbf{x}}{\partial \mathbf{y}}]_{ij} = \frac{\partial x_i}{\partial y_j}
    \end{displaymath}
    记住以下公式，对于推导公式非常有用：
    \begin{center}
        $\partial \mathbf{A} = 0$ \hspace{1.5cm} A的元素都是常数 \\
        $\partial (\alpha \mathbf{X}) = \alpha \partial \mathbf{X}$ \\
        $\partial (\mathbf{X}+\mathbf{Y}) = \partial \mathbf{X} + \partial \mathbf{Y}$ \\
        $\partial (Tr(\mathbf{X})) = Tr(\partial \mathbf{X})$ \\
        $\partial (\mathbf{XY}) = (\partial \mathbf{X})\mathbf{Y} + \mathbf{X}(\partial \mathbf{Y})$ \\
        $\partial (\mathbf{X}^{-1}) = -\mathbf{X}^{-1}(\partial \mathbf{X})\mathbf{X}^{-1}$ \\
        $\partial (det(\mathbf{X})) = Tr(adj(\mathbf{X}) \partial \mathbf{X})$ \\
        $\partial (det(\mathbf{X})) = det(\mathbf{X})Tr(\mathbf{X}^{-1} \partial \mathbf{X})$ \\
        $\partial (\mathbf{X}^T) = (\partial \mathbf{X})^T$ \\
    \end{center}

\section{分子布局与分母布局}
    矩阵求导有两种布局，分子布局(numerator layout)和分母布局(denominator layout)。
    \begin{enumerate}
        \item \textbf{分子布局}(Jacobian formulation): 分子为$\mathbf{y}$或者分母为$\mathbf{x}^T$(即，分子为列向量或分母为行向量)
        \item \textbf{分母布局}(Hessian formulation): 分子为$\mathbf{y^T}$或者分母为$\mathbf{x}$(即，分子为行向量或分母为列向量)
    \end{enumerate}
    对待不同布局还是需要谨慎的，不要局部的混乱使用。\cite{bc}
    这里假定所有非转置向量形式都是列向量：
    \begin{displaymath}
        \mathbf{y} = \begin{bmatrix}
            y_{1} \\
            y_{2} \\
            \vdots \\
            y_{n} \\
        \end{bmatrix}
    \end{displaymath}
    在\textbf{分子布局}(Numerator layout)下，\\
    向量$\mathbf{y}$对标量$x$的求导：
    \begin{displaymath}
        \frac{\partial \mathbf{y}}{\partial x} = \begin{bmatrix}
            \frac{\partial y_1}{\partial x} \\
            \frac{\partial y_2}{\partial x} \\
            \vdots \\
            \frac{\partial y_n}{\partial x}
        \end{bmatrix}
    \end{displaymath}
    标量$y$对向量$\mathbf{x}$的求导：
    \begin{displaymath}
        \frac{\partial y}{\partial \mathbf{x}} = \begin{bmatrix}
            \frac{\partial y}{\partial x_1} & \frac{\partial y}{\partial x_2} & \cdots & \frac{\partial y}{\partial x_n}
        \end{bmatrix}
    \end{displaymath}
    向量$\mathbf{y}$对向量$\mathbf{x}$的求导:
    \begin{displaymath}
        \frac{\partial \mathbf{y}}{\partial \mathbf{x}} = \begin{bmatrix}
          \frac{\partial y_{1}}{\partial x_{1}} & \frac{\partial y_{1}}{\partial x_{2}} & \cdots & \frac{\partial y_{1}}{\partial x_{n}}\\
          \frac{\partial y_{2}}{\partial x_{1}} & \frac{\partial y_{2}}{\partial x_{2}} & \cdots & \frac{\partial y_{2}}{\partial x_{n}}\\
          \vdots & \vdots & \ddots & \vdots\\
          \frac{\partial y_{n}}{\partial x_{1}} & \frac{\partial y_{n}}{\partial x_{2}} & \cdots & \frac{\partial y_{n}}{\partial x_{n}}
        \end{bmatrix}
    \end{displaymath}
    标量$y$对矩阵$\mathbf{X} \in \Re^{m \times n} $的求导(即，标量对矩阵转置后的每一项进行求导):
    \begin{displaymath}
        \frac{\partial y}{\partial \mathbf{X}} = \begin{bmatrix}
            \frac{\partial y}{\partial x_{11}} & \frac{\partial y}{\partial x_{21}} & \cdots & \frac{\partial y}{\partial x_{m1}} \\
            \frac{\partial y}{\partial x_{12}} & \frac{\partial y}{\partial x_{22}} & \cdots & \frac{\partial y}{\partial x_{m2}} \\
            \vdots & \vdots & \ddots & \vdots \\
            \frac{\partial y}{\partial x_{1n}} & \frac{\partial y}{\partial x_{2n}} & \cdots & \frac{\partial y}{\partial x_{mn}} \\
        \end{bmatrix}
    \end{displaymath}
    矩阵$\mathbf{Y} \in \Re^{m \times n} $对标量$x$的求导:
    \begin{displaymath}
        \frac{\partial \mathbf{Y}}{\partial x} = \begin{bmatrix}
            \frac{\partial y_{11}}{\partial x} & \frac{\partial y_{12}}{\partial x} & \cdots & \frac{\partial y_{1n}}{\partial x} \\
            \frac{\partial y_{21}}{\partial x} & \frac{\partial y_{22}}{\partial x} & \cdots & \frac{\partial y_{2n}}{\partial x} \\
            \vdots & \vdots & \ddots & \vdots \\
            \frac{\partial y_{m1}}{\partial x} & \frac{\partial y_{m2}}{\partial x} & \cdots & \frac{\partial y_{mn}}{\partial x} \\
        \end{bmatrix}
    \end{displaymath}
    在\textbf{分母布局}(Denominator layout)下，\\
    向量$\mathbf{y}$对标量$x$的求导：
    \begin{displaymath}
        \frac{\partial \mathbf{y}}{\partial x} = \begin{bmatrix}
            \frac{\partial y_1}{\partial x} & \frac{\partial y_2}{\partial x} & \cdots & \frac{\partial y_n}{\partial x}
        \end{bmatrix}
    \end{displaymath}
    标量$x$对向量$\mathbf{y}$的求导：
    \begin{displaymath}
        \frac{\partial y}{\partial \mathbf{x}} = \begin{bmatrix}
            \frac{\partial y}{\partial x_1} \\
            \frac{\partial y}{\partial x_2} \\
            \vdots \\
            \frac{\partial y}{\partial x_n}
        \end{bmatrix}
    \end{displaymath}
    向量$\mathbf{y}$对向量$\mathbf{x}$的求导:
    \begin{displaymath}
        \frac{\partial \mathbf{y}}{\partial \mathbf{x}} = \begin{bmatrix}
            \frac{\partial y_{1}}{\partial x_{1}} & \frac{\partial y_{2}}{\partial x_{1}} & \cdots & \frac{\partial y_{m}}{\partial x_{1}}\\
            \frac{\partial y_{1}}{\partial x_{2}} & \frac{\partial y_{2}}{\partial x_{2}} & \cdots & \frac{\partial y_{m}}{\partial x_{2}}\\
            \vdots & \vdots & \ddots & \vdots\\
            \frac{\partial y_{1}}{\partial x_{n}} & \frac{\partial y_{2}}{\partial x_{n}} & \cdots & \frac{\partial y_{m}}{\partial x_{n}}
        \end{bmatrix}
    \end{displaymath}
    标量$y$对矩阵$\mathbf{X} \in \Re^{m \times n} $的求导(即，标量对原矩阵每一项进行求导):
    \begin{displaymath}
        \frac{\partial y}{\partial \mathbf{X}} = \begin{bmatrix}
            \frac{\partial y}{\partial x_{11}} & \frac{\partial y}{\partial x_{12}} & \cdots & \frac{\partial y}{\partial x_{1n}} \\
            \frac{\partial y}{\partial x_{21}} & \frac{\partial y}{\partial x_{22}} & \cdots & \frac{\partial y}{\partial x_{2n}} \\
            \vdots & \vdots & \ddots & \vdots \\
            \frac{\partial y}{\partial x_{m1}} & \frac{\partial y}{\partial x_{m2}} & \cdots & \frac{\partial y}{\partial x_{mn}} \\
        \end{bmatrix}
    \end{displaymath}
    矩阵$\mathbf{Y} \in \Re^{m \times n} $对标量$x$的求导:
    \begin{displaymath}
        \frac{\partial \mathbf{Y}}{\partial x} = \begin{bmatrix}
            \frac{\partial y_{11}}{\partial x} & \frac{\partial y_{21}}{\partial x} & \cdots & \frac{\partial y_{m1}}{\partial x} \\
            \frac{\partial y_{12}}{\partial x} & \frac{\partial y_{22}}{\partial x} & \cdots & \frac{\partial y_{m2}}{\partial x} \\
            \vdots & \vdots & \ddots & \vdots \\
            \frac{\partial y_{1n}}{\partial x} & \frac{\partial y_{2n}}{\partial x} & \cdots & \frac{\partial y_{mn}}{\partial x} \\
        \end{bmatrix}
    \end{displaymath}

    \subsection{栗子}
        这个简单栗子来自The Matrix Cookbook \cite{bb}的一个一阶(first-order)偏导，笔者进行了如下推导
        \begin{displaymath}
            \frac{\partial \mathbf{x}^T\mathbf{a}}{\partial \mathbf{x}} = \frac{\partial \mathbf{a}^T\mathbf{x}}{\partial \mathbf{x}} = \mathbf{a}
        \end{displaymath}
        \begin{displaymath}
            \mathbf{x}^T\mathbf{a} = \mathbf{a}^T\mathbf{x} = \begin{bmatrix}
                x_1 a_1 + x_2 a_2 + \cdots + x_n a_n
            \end{bmatrix}
        \end{displaymath}
        根据分母布局可得:
        \begin{displaymath}
            \frac{\partial \mathbf{x}^T\mathbf{a}}{\partial \mathbf{x}} = \begin{bmatrix}
                \frac{\partial (x_1 a_1 + x_2 a_2 + \cdots + x_n a_n)}{\partial x_1} \\
                \frac{\partial (x_1 a_1 + x_2 a_2 + \cdots + x_n a_n)}{\partial x_2} \\
                \vdots \\
                \frac{\partial (x_1 a_1 + x_2 a_2 + \cdots + x_n a_n)}{\partial x_n}
            \end{bmatrix} = \mathbf{a}
        \end{displaymath}
        根据分子布局，则得到了不同的结果:
        \begin{displaymath}
            \frac{\partial \mathbf{x}^T\mathbf{a}}{\partial \mathbf{x}} = \begin{bmatrix}
                \frac{\partial (x_1 a_1 + x_2 a_2 + \cdots + x_n a_n)}{\partial x_1} & \frac{\partial (x_1 a_1 + x_2 a_2 + \cdots + x_n a_n)}{\partial x_2} & \frac{\partial (x_1 a_1 + x_2 a_2 + \cdots + x_n a_n)}{\partial x_n}
            \end{bmatrix} = \mathbf{a}^T
        \end{displaymath}


\newpage
\begin{thebibliography}{99}
    \bibitem{ba} Jeremy Kun. A Programmer’s Introduction to Mathematics (2018)
    \bibitem{bb} Kaare Brandt Petersen, Michael Syskind Pedersen. The Matrix Cookbook (2012)
    \bibitem{bc} wiki. Matrix calculus (2019)
\end{thebibliography}

\end{document}
